Q2_code

## install.packages("tidytuesdayR")
library(tidyverse)
library(janitor)
library(forcats)
library(dplyr)
library(patchwork)
tuesdata <- tidytuesdayR::tt_load('2025-01-21')

#Although our two questions only use exped_tidy, we have added code to make a separate peaks_tidy in case we find out that we need variables from that dataframe later on.

exped_tidy <- tuesdata$exped_tidy
#peaks_tidy <- tuesdata$peaks_tidy

exped_tidy <- clean_names(exped_tidy)
#peaks_tidy <- clean_names(peaks_tidy)

head(exped_tidy, n=10)
# A tibble: 10 × 69
   expid     peakid  year season season_factor  host host_factor route1   route2
   <chr>     <chr>  <dbl>  <dbl> <chr>         <dbl> <chr>       <chr>    <chr> 
 1 EVER20101 EVER    2020      1 Spring            2 China       N Col-N… <NA>  
 2 EVER20102 EVER    2020      1 Spring            2 China       N Col-N… <NA>  
 3 EVER20103 EVER    2020      1 Spring            2 China       N Col-N… <NA>  
 4 AMAD20301 AMAD    2020      3 Autumn            1 Nepal       SW Ridge <NA>  
 5 AMAD20302 AMAD    2020      3 Autumn            1 Nepal       SW Ridge <NA>  
 6 AMAD20303 AMAD    2020      3 Autumn            1 Nepal       SW Ridge <NA>  
 7 AMAD20304 AMAD    2020      3 Autumn            1 Nepal       SW Ridge <NA>  
 8 AMAD20305 AMAD    2020      3 Autumn            1 Nepal       SW Ridge <NA>  
 9 AMAD20306 AMAD    2020      3 Autumn            1 Nepal       SW Ridge <NA>  
10 AMAD20307 AMAD    2020      3 Autumn            1 Nepal       SW Ridge <NA>  
# ℹ 60 more variables: route3 <lgl>, route4 <lgl>, nation <chr>, leaders <chr>,
#   sponsor <chr>, success1 <lgl>, success2 <lgl>, success3 <lgl>,
#   success4 <lgl>, ascent1 <chr>, ascent2 <chr>, ascent3 <lgl>, ascent4 <lgl>,
#   claimed <lgl>, disputed <lgl>, countries <chr>, approach <chr>,
#   bcdate <date>, smtdate <date>, smttime <chr>, smtdays <dbl>, totdays <dbl>,
#   termdate <date>, termreason <dbl>, termreason_factor <chr>, termnote <chr>,
#   highpoint <dbl>, traverse <lgl>, ski <lgl>, parapente <lgl>, camps <dbl>, …
#head(peaks_tidy, n=10)
exped_tidy <- exped_tidy |>
  #mutate new columns
  mutate(
    "pmdeaths" = mdeaths/totmembers,
    "phdeaths" = hdeaths/tothired,
    "totdeaths" = (hdeaths+ mdeaths),
    "ptotdeaths" = totdeaths/(tothired + totmembers)
  )

#filter for all epeditions with at least one death
exped_tidy_deadly <- exped_tidy |>
  filter(pmdeaths > 0 | phdeaths > 0) |>
  filter(!is.na(agency))

#create a subset dataframe that clears out unecessary columns
exped_tidy_deadly <- exped_tidy_deadly |>
  select(year, season_factor, host_factor, nation, agency, totmembers, smtmembers, mdeaths, tothired, hdeaths, totdeaths, pmdeaths, phdeaths, ptotdeaths)
exped_tidy_deadly
# A tibble: 37 × 14
    year season_factor host_factor nation agency   totmembers smtmembers mdeaths
   <dbl> <chr>         <chr>       <chr>  <chr>         <dbl>      <dbl>   <dbl>
 1  2021 Spring        Nepal       USA    TAGnepa…         35         10       0
 2  2021 Spring        Nepal       Russia 7 Summi…         15          7       0
 3  2021 Spring        Nepal       India  Seven S…         37         22       2
 4  2021 Spring        Nepal       China  Seven S…         13          9       0
 5  2021 Autumn        Nepal       Nepal  TAGnepa…          5          0       1
 6  2021 Autumn        Nepal       France Pralhad…          4          0       3
 7  2022 Spring        Nepal       Greece Seven S…          7          3       1
 8  2022 Spring        Nepal       USA    Beyul A…         10          8       0
 9  2022 Spring        Nepal       Russia 7 Summi…         15         12       1
10  2022 Spring        Nepal       Nepal  High Fi…          7          6       1
# ℹ 27 more rows
# ℹ 6 more variables: tothired <dbl>, hdeaths <dbl>, totdeaths <dbl>,
#   pmdeaths <dbl>, phdeaths <dbl>, ptotdeaths <dbl>
#assigns deadly agencies to one tibble
deadly_agencies <- unique(exped_tidy_deadly$agency)

#assigns non-deadly agencies to one tibble
non_deadly <- exped_tidy |>
  filter(!agency %in% deadly_agencies)
non_deadly_agencies <- unique(non_deadly$agency)

#prints out the length for each so that I can view these values. Will not be used for graphing
length(non_deadly_agencies)
[1] 160
length(deadly_agencies)
[1] 22
exped_tidy_deadly |>
  #graphs agency by number of fatal expeditions. fct_infreq was debugged consulting with AI after looking at documentation. Same with after_stat(count)
  ggplot(aes(x = fct_rev(fct_infreq(agency)), fill = after_stat(count))) +
  geom_bar() +
  
  #flips coordinates for better readbility of agencies
  coord_flip() +
  
  #Increased number of breaks
  scale_y_continuous(breaks = c(0, 2, 4, 6, 8, 10)) +
  
  #colored where red is more deadly. I wanted a low intensity representing an increasing intensity so I settled on yellow
  scale_fill_gradient(low = "#ffce00", high = "darkred") +
  
  labs(
    title = "Number of expeditions through the Himalayas \nthat resulted in death by Agency",
    subtitle = "from 2021 - 2024",
    caption = "Source: Tidytuesday",
    x = NULL,
    y = "Number of expeditions that resulted in at least one death",
    fill = NULL
  ) +
  theme_minimal() +
  
  #got rid of grid to improve readability
  theme(
    legend.position = "none",
    panel.grid.minor = element_blank(),
    panel.grid.major.y = element_blank()
  )

#filtered for only 2021 data
deaths_2021 <- exped_tidy_deadly |>
  filter(year == 2021)
deaths_2021
# A tibble: 6 × 14
   year season_factor host_factor nation agency    totmembers smtmembers mdeaths
  <dbl> <chr>         <chr>       <chr>  <chr>          <dbl>      <dbl>   <dbl>
1  2021 Spring        Nepal       USA    TAGnepal…         35         10       0
2  2021 Spring        Nepal       Russia 7 Summit…         15          7       0
3  2021 Spring        Nepal       India  Seven Su…         37         22       2
4  2021 Spring        Nepal       China  Seven Su…         13          9       0
5  2021 Autumn        Nepal       Nepal  TAGnepal…          5          0       1
6  2021 Autumn        Nepal       France Pralhad …          4          0       3
# ℹ 6 more variables: tothired <dbl>, hdeaths <dbl>, totdeaths <dbl>,
#   pmdeaths <dbl>, phdeaths <dbl>, ptotdeaths <dbl>
#grouped to create the average percent deaths per agency
deaths_2021_av <- deaths_2021 %>%
  #grouped by season for color reasons
  group_by(agency, season_factor) %>%
  #we used the average percent for the x axis to better compare between groups
  summarise(avg_ptotdeaths = mean(ptotdeaths, na.rm = TRUE),
            .groups = "drop")
deaths_2021_av
# A tibble: 5 × 3
  agency                                      season_factor avg_ptotdeaths
  <chr>                                       <chr>                  <dbl>
1 7 Summits Adventure                         Spring                0.0222
2 Pralhad Chapagain (Freelancer at Expes.com) Autumn                0.75  
3 Seven Summit Treks                          Spring                0.0356
4 TAGnepal Trekking                           Spring                0.0118
5 TAGnepal Trekking (Snowy Horizon pmt)       Autumn                0.111 
#raw death values, similar approach as above
deaths_2021_raw <- deaths_2021 |>
  group_by(agency, season_factor) %>%
  summarize(total_deaths = sum(totdeaths), .groups = "drop")
deaths_2021_raw
# A tibble: 5 × 3
  agency                                      season_factor total_deaths
  <chr>                                       <chr>                <dbl>
1 7 Summits Adventure                         Spring                   1
2 Pralhad Chapagain (Freelancer at Expes.com) Autumn                   3
3 Seven Summit Treks                          Spring                   3
4 TAGnepal Trekking                           Spring                   1
5 TAGnepal Trekking (Snowy Horizon pmt)       Autumn                   1
# label: 2021 percent death graph

deaths_2021_av |>
  #graph by descending percent total deaths by agency. Color is for season
  ggplot(aes(x = fct_reorder(agency, avg_ptotdeaths, .desc = FALSE), y = avg_ptotdeaths, fill = season_factor)) +
  geom_col() +
  
  #set coordinates for better comparison between groups
  coord_flip(ylim = c(0, 1)) +
  
  #colored based on majority season color association
  scale_fill_manual(values = c("orange", "lightgreen")) +
  labs(
    fill = "Season",
    x = "Trekking Agency",
    y = "Total Death",
    title = "Percent total deaths by Agency in 2021",
    caption = "Source: TidyTuesday",
    subtitle = "M is trekking member death, H is hired staff death"
  ) +
  
  #rename x values to have percent
  scale_y_continuous(
    breaks = c(0, 0.2, 0.4, 0.6, 0.8, 1),
    labels = c("0%", "20%", "40%", "60%", "80%", "100%")
  ) +
  
  #individual labels for each individual percent death rather than average and distingish it between member and hired staff
  annotate("text", y = 0.2, x = 1, label = "M 6.6%") +
  annotate("text", y = 0.2, x = 2, label = "H 3.3%") +
  annotate("text", y = 0.3, x = 3, label = "Trek 1: M 54%") +
  annotate("text", y = 0.7, x = 3, label = "Trek 2: H 10%") +
  annotate("text", y = 0.3, x = 4, label = "M 20%") +
  annotate("text", y = 0.4, x = 5, label = "M 75%") +
  theme_minimal() +
  
  #cleaned up grid for better readability of annotations
  theme(
    axis.ticks.x = element_blank(),
    panel.grid = element_blank()
  )
deaths_2021_raw |>
  #same as above, except with total deaths and not percents
  ggplot(aes(x = fct_reorder(agency, total_deaths, .desc = FALSE), y = total_deaths, fill = season_factor)) +
  geom_col() +
  
  #sets coordinates for easier comparison between groups
  coord_flip(ylim = c(0, 5)) +
  
  #colored same as above for easy comparison
  scale_fill_manual(values = c("orange", "lightgreen")) +
  
  labs(
    fill = "Season",
    x = "Trekking Agency",
    y = "Total Deaths",
    title = "Percent total deaths by Agency in 2021",
    caption = "Source: TidyTuesday",
    subtitle = "M is trekking member death, H is hired staff death"
  ) +
  
  #set breaks
  scale_y_continuous(
    breaks = c(0, 1, 2, 3, 4, 5)
  ) +
  
  #will not use this annotation after all
  #annotate("text", y = 2, x = 3, label = "M 1") +
  #annotate("text", y = 2, x = 2, label = "H 1") +
  #annotate("text", y = 2, x = 1, label = "Trek 1: M 2") +
  #annotate("text", y = 4, x = 1, label = "Trek 2: H 1") +
  #annotate("text", y = 4, x = 4, label = "M 3") +
  #annotate("text", y = 4, x = 5, label = "M 3") +
  
  #instead annotate with number of treks because most deaths were 1 or 2 total so the prior labels are somewhat superfulous and messy
  annotate("text", y = 4, x = 5, label = "Total Treks: 2") +
  theme_minimal() +
  
  #clean grid for better readability
  theme(
    axis.ticks.x = element_blank(),
    panel.grid = element_blank()
  )

All code below are copy and pasted of the graphs above witht the year and specific annotations changed.

deaths_2022 <- exped_tidy_deadly |>
  filter(year == 2022)
deaths_2022
# A tibble: 10 × 14
    year season_factor host_factor nation   agency totmembers smtmembers mdeaths
   <dbl> <chr>         <chr>       <chr>    <chr>       <dbl>      <dbl>   <dbl>
 1  2022 Spring        Nepal       Greece   Seven…          7          3       1
 2  2022 Spring        Nepal       USA      Beyul…         10          8       0
 3  2022 Spring        Nepal       Russia   7 Sum…         15         12       1
 4  2022 Spring        Nepal       Nepal    High …          7          6       1
 5  2022 Spring        Nepal       Nepal    Pione…         13          9       1
 6  2022 Spring        Nepal       S Korea  Seven…          8          0       0
 7  2022 Autumn        Nepal       Georgia  Sator…         16         12       1
 8  2022 Autumn        Nepal       Netherl… Seven…         15          0       0
 9  2022 Autumn        Nepal       USA      Shang…          5          2       1
10  2022 Autumn        Nepal       USA      Sator…         10          0       0
# ℹ 6 more variables: tothired <dbl>, hdeaths <dbl>, totdeaths <dbl>,
#   pmdeaths <dbl>, phdeaths <dbl>, ptotdeaths <dbl>
deaths_2022_av <- deaths_2022 %>%
  group_by(agency, season_factor) %>%
  summarise(avg_ptotdeaths = mean(ptotdeaths, na.rm = TRUE),
            .groups = "drop")
deaths_2022_av
# A tibble: 8 × 3
  agency                                        season_factor avg_ptotdeaths
  <chr>                                         <chr>                  <dbl>
1 7 Summits Adventure                           Spring                0.0263
2 Beyul Adventure                               Spring                0.0323
3 High Five Adventures (Pioneer Adventures pmt) Spring                0.1   
4 Pioneer Adventure                             Spring                0.0333
5 Satori Adventures                             Autumn                0.0406
6 Seven Summit Treks                            Autumn                0.0333
7 Seven Summit Treks                            Spring                0.0565
8 Shangri-La Nepal Treks                        Autumn                0.0833
deaths_2022_raw <- deaths_2022 |>
  group_by(agency, season_factor) %>%
  summarize(total_deaths = sum(totdeaths), .groups = "drop")
deaths_2022_raw
# A tibble: 8 × 3
  agency                                        season_factor total_deaths
  <chr>                                         <chr>                <dbl>
1 7 Summits Adventure                           Spring                   1
2 Beyul Adventure                               Spring                   1
3 High Five Adventures (Pioneer Adventures pmt) Spring                   1
4 Pioneer Adventure                             Spring                   1
5 Satori Adventures                             Autumn                   2
6 Seven Summit Treks                            Autumn                   1
7 Seven Summit Treks                            Spring                   2
8 Shangri-La Nepal Treks                        Autumn                   1
deaths_2022_av |>
  ggplot(aes(x = fct_reorder(agency, avg_ptotdeaths, .desc = FALSE), y = avg_ptotdeaths, fill = season_factor)) +
  geom_col() +
  coord_flip(ylim = c(0, 1)) +
  scale_fill_manual(values = c("orange", "lightgreen")) +
  labs(
    fill = "Season",
    x = "Trekking Agency",
    y = "Total Percent Death (Average)",
    title = "Percent total deaths by Agency in 2022",
    caption = "Source: TidyTuesday",
    subtitle = "M is trekking member death, H is hired staff death"
  ) +
  scale_y_continuous(
    breaks = c(0, 0.2, 0.4, 0.6, 0.8, 1),
    labels = c("0%", "20%", "40%", "60%", "80%", "100%")
  ) +
  annotate("text", y = 0.2, x = 1, label = "H 20%") +
  annotate("text", y = 0.2, x = 2, label = "H 4.7%") +
  annotate("text", y = 0.2, x = 3, label = "M 7.6%") +
  annotate("text", y = 0.3, x = 4, label = "Trek 1: M 6.25%") +
  annotate("text", y = 0.7, x = 4, label = "Trek 2: H 10%") +
  annotate("text", y = 0.32, x = 4.8, label = "Trek 1: M 14.2%") +
  annotate("text", y = 0.32, x = 5.2, label = "Trek 2: H 6.25%") +
  annotate("text", y = 0.8, x = 5, label = "Trek 3: H 6.67%") +
  annotate("text", y = 0.25, x = 6, label = "M 20%") +
  annotate("text", y = 0.3, x = 7, label = "M 14.28%") +
  theme_minimal() +
  theme(
    axis.ticks.x = element_blank(),
    panel.grid = element_blank()
  )

deaths_2022_raw |>
  ggplot(aes(x = fct_reorder(agency, total_deaths, .desc = FALSE), y = total_deaths, fill = season_factor)) +
  geom_col() +
  coord_flip(ylim = c(0, 5)) +
  scale_fill_manual(values = c("orange", "lightgreen")) +
  labs(
    fill = "Season",
    x = "Trekking Agency",
    y = "Total Percent Death (Average)",
    title = "Percent total deaths by Agency in 2022",
    caption = "Source: TidyTuesday",
    subtitle = "M is trekking member death, H is hired staff death"
  ) +
  scale_y_continuous(
    breaks = c(0, 1, 2, 3, 4, 5)
  ) +
  #annotate("text", y = 2, x = 1, label = "M 1") +
  #annotate("text", y = 2, x = 2, label = "H 1") +
  #annotate("text", y = 2, x = 3, label = "M 1") +
  #annotate("text", y = 2, x = 4, label = "M 1") +
  #annotate("text", y = 2, x = 5, label = "M 1") +
  #annotate("text", y = 1, x = 6, label = "Trek 1: M 1") +
  #annotate("text", y = 2.7, x = 6, label = "Trek 2: H 1") +
  #annotate("text", y = 4.5, x = 6, label = "Trek 3: H 1") +
  #annotate("text", y = 1, x = 7, label = "Trek 1: M 1") +
  #annotate("text", y = 3, x = 7, label = "Trek 2: H 1") +
  annotate("text", y = 4, x = 6, label = "Total Treks: 3") +
  annotate("text", y = 3, x = 7, label = "Total Treks: 2") +
  theme_minimal() +
  theme(
    axis.ticks.x = element_blank(),
    panel.grid = element_blank()
  )

deaths_2023 <- exped_tidy_deadly |>
  filter(year == 2023)
deaths_2023
# A tibble: 15 × 14
    year season_factor host_factor nation   agency totmembers smtmembers mdeaths
   <dbl> <chr>         <chr>       <chr>    <chr>       <dbl>      <dbl>   <dbl>
 1  2023 Spring        Nepal       Pakistan Seven…         21         12       1
 2  2023 Spring        Nepal       Nepal    Imagi…         15         10       0
 3  2023 Spring        Nepal       USA      Beyul…          6          3       1
 4  2023 Spring        Nepal       USA      Himal…         19         12       1
 5  2023 Spring        Nepal       Nepal    Asian…         13         10       2
 6  2023 Spring        Nepal       China    8K Ex…         17          6       1
 7  2023 Spring        Nepal       Denmark  Seven…         30         14       2
 8  2023 Spring        Nepal       India    Pione…         15          6       2
 9  2023 Spring        Nepal       Vietnam  Exped…         10          3       1
10  2023 Spring        Nepal       France   Glaci…          5          1       1
11  2023 Spring        Nepal       Nepal    Annap…          1          0       1
12  2023 Spring        Nepal       Nepal    Peak …          8          5       0
13  2023 Spring        Nepal       Germany  Seven…         16          8       1
14  2023 Autumn        Nepal       Czech R… 14 Su…          6          2       1
15  2023 Autumn        Nepal       Russia   Himal…          3          0       1
# ℹ 6 more variables: tothired <dbl>, hdeaths <dbl>, totdeaths <dbl>,
#   pmdeaths <dbl>, phdeaths <dbl>, ptotdeaths <dbl>
deaths_2023_av <- deaths_2023 %>%
  group_by(agency, season_factor) %>%
  summarise(avg_ptotdeaths = mean(ptotdeaths, na.rm = TRUE),
            .groups = "drop")
deaths_2023_av
# A tibble: 13 × 3
   agency                 season_factor avg_ptotdeaths
   <chr>                  <chr>                  <dbl>
 1 14 Summits             Autumn                0.167 
 2 8K Expeditions         Spring                0.0270
 3 Annapurna Treks        Spring                1     
 4 Asian Trekking         Spring                0.05  
 5 Beyul Adventure        Spring                0.0476
 6 Expedition Himalaya    Spring                0.0333
 7 Glacier Himalaya Treks Spring                0.1   
 8 Himalayan Guides       Autumn                0.333 
 9 Himalayan Guides       Spring                0.0204
10 Imagine Nepal          Spring                0.0667
11 Peak Promotion         Spring                0.125 
12 Pioneer Adventure      Spring                0.0667
13 Seven Summit Treks     Spring                0.0281
deaths_2023_raw <- deaths_2023 |>
  group_by(agency, season_factor) %>%
  summarize(total_deaths = sum(totdeaths), .groups = "drop")
deaths_2023_raw
# A tibble: 13 × 3
   agency                 season_factor total_deaths
   <chr>                  <chr>                <dbl>
 1 14 Summits             Autumn                   1
 2 8K Expeditions         Spring                   1
 3 Annapurna Treks        Spring                   2
 4 Asian Trekking         Spring                   2
 5 Beyul Adventure        Spring                   1
 6 Expedition Himalaya    Spring                   1
 7 Glacier Himalaya Treks Spring                   1
 8 Himalayan Guides       Autumn                   1
 9 Himalayan Guides       Spring                   1
10 Imagine Nepal          Spring                   3
11 Peak Promotion         Spring                   2
12 Pioneer Adventure      Spring                   2
13 Seven Summit Treks     Spring                   4
deaths_2023_av |>
  ggplot(aes(x = fct_reorder(agency, avg_ptotdeaths, .desc = FALSE), y = avg_ptotdeaths, fill = season_factor)) +
  geom_col() +
  coord_flip(ylim = c(0, 1)) +
  scale_fill_manual(values = c("orange", "lightgreen")) +
  labs(
    fill = "Season",
    x = "Trekking Agency",
    y = "Total Percent Death (Average)",
    title = "Percent total deaths by Agency in 2023",
    caption = "Source: TidyTuesday",
    subtitle = "M is trekking member death, H is hired staff death"
  ) +
  scale_y_continuous(
    breaks = c(0, 0.2, 0.4, 0.6, 0.8, 1),
    labels = c("0%", "20%", "40%", "60%", "80%", "100%")
  ) +
  annotate("text", y = 0.2, x = 1, label = "M 5.8%") +
  annotate("text", y = 0.2, x = 2, label = "Trek 1: M 4.7%") +
  annotate("text", y = 0.5, x = 2, label = "Trek 2: M 6.7%") +
  annotate("text", y = 0.8, x = 2, label = "Trek 3: 6.3%") +
  annotate("text", y = 0.2, x = 3, label = "M 10%") +
  annotate("text", y = 0.2, x = 4, label = "M 1.66%") +
  annotate("text", y = 0.2, x = 5, label = "M 15.3%") +
  annotate("text", y = 0.2, x = 6, label = "H 10%") +
  annotate("text", y = 0.2, x = 7, label = "M 13.3%") +
  annotate("text", y = 0.2, x = 8, label = "M 20%") +
  annotate("text", y = 0.23, x = 9, label = "H 25%") +
  annotate("text", y = 0.25, x = 10, label = "M 16.7%") +
  annotate("text", y = 0.5, x = 11, label = "Trek 1: M 5.3%") +
  annotate("text", y = 0.8, x = 11, label = "Trek 2: M 33.3%") +
  annotate("text", y = 0.5, x = 12, label = "M 100%") +
  theme_minimal() +
  theme(
    axis.ticks.x = element_blank(),
    panel.grid = element_blank()
  )

deaths_2023_raw |>
  ggplot(aes(x = fct_reorder(agency, total_deaths, .desc = FALSE), y = total_deaths, fill = season_factor)) +
  geom_col() +
  coord_flip(ylim = c(0, 5)) +
  scale_fill_manual(values = c("orange", "lightgreen")) +
  labs(
    fill = "Season",
    x = "Trekking Agency",
    y = "Total Death",
    title = "Total deaths by Agency in 2023",
    caption = "Source: TidyTuesday",
    subtitle = "M is trekking member death, H is hired staff death"
  ) +
  scale_y_continuous(
    breaks = c(0, 1, 2, 3, 4, 5)
  ) +
  annotate("text", y = 3, x = 6, label = "Total Treks: 2") +
  annotate("text", y = 3, x = 12, label = "Total Treks: 3") +
  theme_minimal() +
  theme(
    axis.ticks.x = element_blank(),
    panel.grid = element_blank()
  )

deaths_2024 <- exped_tidy_deadly |>
  filter(year == 2024)
deaths_2024
# A tibble: 6 × 14
   year season_factor host_factor nation  agency   totmembers smtmembers mdeaths
  <dbl> <chr>         <chr>       <chr>   <chr>         <dbl>      <dbl>   <dbl>
1  2024 Spring        Nepal       Italy   Seven S…         22         12       1
2  2024 Spring        Nepal       Ukraine 8K Expe…         36         29       4
3  2024 Spring        Nepal       Nepal   Yeti Ad…          2          1       1
4  2024 Spring        Nepal       UK      Makalu …         15          7       1
5  2024 Spring        Nepal       Nepal   Seven S…         27         15       0
6  2024 Spring        Nepal       France  Snowy H…         10          5       1
# ℹ 6 more variables: tothired <dbl>, hdeaths <dbl>, totdeaths <dbl>,
#   pmdeaths <dbl>, phdeaths <dbl>, ptotdeaths <dbl>
deaths_2024_av <- deaths_2024 %>%
  group_by(agency, season_factor) %>%
  summarise(avg_ptotdeaths = mean(ptotdeaths, na.rm = TRUE),
            .groups = "drop")
deaths_2024_av
# A tibble: 5 × 3
  agency              season_factor avg_ptotdeaths
  <chr>               <chr>                  <dbl>
1 8K Expeditions      Spring                0.0581
2 Makalu Adventure    Spring                0.0333
3 Seven Summit Treks  Spring                0.0279
4 Snowy Horizon Treks Spring                0.0370
5 Yeti Adventure      Spring                0.333 
deaths_2024_raw <- deaths_2024 |>
  group_by(agency, season_factor) %>%
  summarize(total_deaths = sum(totdeaths), .groups = "drop")
deaths_2024_raw
# A tibble: 5 × 3
  agency              season_factor total_deaths
  <chr>               <chr>                <dbl>
1 8K Expeditions      Spring                   5
2 Makalu Adventure    Spring                   1
3 Seven Summit Treks  Spring                   3
4 Snowy Horizon Treks Spring                   1
5 Yeti Adventure      Spring                   1
deaths_2024_av |>
  ggplot(aes(x = fct_reorder(agency, avg_ptotdeaths, .desc = FALSE), y = avg_ptotdeaths, fill = season_factor)) +
  geom_col() +
  coord_flip(ylim = c(0, 1)) +
  scale_fill_manual(values = "lightgreen") +
  labs(
    fill = "Season",
    x = "Trekking Agency",
    y = "Total Percent Death (Average)",
    title = "Percent total deaths by Agency in 2024",
    caption = "Source: TidyTuesday",
    subtitle = "M is trekking member death, H is hired staff death"
  ) +
  scale_y_continuous(
    breaks = c(0, 0.2, 0.4, 0.6, 0.8, 1),
    labels = c("0%", "20%", "40%", "60%", "80%", "100%")
  ) +
  annotate("text", y = 0.2, x = 2, label = "M 6.7%") +
  annotate("text", y = 0.3, x = 1, label = "Trek 1: M 4.5% H 3.3%") +
  annotate("text", y = 0.7, x = 1, label = "Trek 2: H 3.2%") +
  annotate("text", y = 0.2, x = 3, label = "M 10%") +
  annotate("text", y = 0.2, x = 4, label = "M 11.1% H 2%") +
  annotate("text", y = 0.5, x = 5, label = "M 50%") +
  theme_minimal() +
  theme(
    axis.ticks.x = element_blank(),
    panel.grid = element_blank()
  )

deaths_2024_raw |>
  ggplot(aes(x = fct_reorder(agency, total_deaths, .desc = FALSE), y = total_deaths, fill = season_factor)) +
  geom_col() +
  coord_flip(ylim = c(0, 5)) +
  scale_fill_manual(values = c("lightgreen")) +
  labs(
    fill = "Season",
    x = "Trekking Agency",
    y = "Total Death",
    title = "Total deaths by Agency in 2024",
    caption = "Source: TidyTuesday",
    subtitle = "M is trekking member death, H is hired staff death"
  ) +
  scale_y_continuous(
    breaks = c(0, 1, 2, 3, 4, 5)
  ) +
  annotate("text", y = 4, x = 4, label = "Total Treks: 2") +
  theme_minimal() +
  theme(
    axis.ticks.x = element_blank(),
    panel.grid = element_blank()
  )

Was not able to use patchwork to combine graphs due to different y values

#combined_plot <- wrap_plots(percent_2021, raw_2021, percent_2022, raw_2022, percent_2023, raw_2023, percent_2024, raw_2024, ncol = 2) 

#combined_plot
#final_2021 <- (percent_2021 + raw_2021)
#final_2021